import os
import openai
import random
import numpy as np
import json
import jsonlines
import time
from tqdm import tqdm
from rank_bm25 import BM25Okapi
import threading

# OPENAI_API_KEY = "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"
# OPENAI_API_KEY = "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"
OPENAI_API_KEY = "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"
openai.api_key = OPENAI_API_KEY

Example_prompt = '''
Here is the example, please follow this example.
Step 1: Initial Perception
Option A: Captions about various objects and scenes, none directly suggesting the concept of 'lap'.
Option B: Captions also diverse, but one explicitly mentions 'lap' in two contexts - 'three racing cars on a lap of a track' and "a little girl sits on her father's lap".
Which is better? Option B. It includes explicit references to 'lap', fitting the cue more directly.

Step 2: Recognizing Incongruity
Option A: The captions are incongruous with the cue 'lap'. They don't relate to the concept of a lap in any context.
Option B: Most captions are incongruous with 'lap', except for the two that explicitly mention it.
Which is better? Option B. It has two captions directly related to 'lap', making it more congruent with the cue.

Step 3: Contextual Analysis
Option A: None of the captions relate to the typical contexts in which 'lap' is used (either as part of a track or in reference to sitting on someone's lap).
Option B: Offers two relevant contexts: racing laps and sitting on a lap.
Which is better? Option B. It aligns well with the different meanings of 'lap', unlike Option A.

Step 4: Linking to the Question
Option A: Fails to link any of its captions to the cue 'lap'.
Option B: Directly addresses the question with two captions that are explicitly related to 'lap'.
Which is better? Option B. Its inclusion of captions specifically related to 'lap' directly answers the question.
'''


def ask_gpt4(question, thread_id, file_lock, line, unanswered_questions):
    messages=[{"role": "user", "content": question}]
    attempt_time = 0
    max_time = 20
    while attempt_time < max_time:
        try:
            response =  openai.ChatCompletion.create(
                            model="gpt-4",
                            max_tokens=1000,
                            temperature=1.2,
                            messages = messages)
            answer = response["choices"][0]["message"]["content"]

            with file_lock:
                with open('./gpt4_ans/winogavil/anscot/swow/test.jsonl','a') as outfile:
                    line['gpt4_rate'] = answer
                    outfile.write(json.dumps(line) + "\n")
                    break

        except openai.error.RateLimitError: # Rate limit exceeded
            attempt_time += 1
            time.sleep(0.2)
        except openai.error.Timeout: # Rate limit exceeded
            attempt_time += 1
            time.sleep(0.2)
        except openai.error.OpenAIError:
            attempt_time += 1
            raise Exception("Sorry, a problem happened")
    if attempt_time == max_time:
        unanswered_questions.append((question, thread_id, line))
        

def read_jsonline(sample_file):
    samples = []
    for line in sample_file.iter():
        sample = '''The option A: %s''' %(str(line['captions']))
        samples.append(sample)
    return samples
        
if __name__=="__main__":
    caption_file = jsonlines.open('./data/winogavil/random_icl/swow/test.jsonl')
    explanation_file = jsonlines.open('./data/winogavil/cb_icl/swow/test.jsonl')
    corpus = read_jsonline(caption_file)
    file_lock = threading.Lock()
    threads = []
    unanswered_questions = []
    with tqdm(desc='Process', unit='it', total=84) as pbar: #5_6: (260); 10_12: (85); swow: (84)
        num = 0
        for line in explanation_file.iter():
            captions = line['labels']
            option_a = corpus[num]
            cue = line['cue']
            labels = line['labels']
            k = np.count_nonzero(labels)
            option_b = '''The option B: %s''' %(str(line['explanations']))
            start_prompt = '''Evaluate the equivalence of the following two captions list for the question "choose the top %d sentences most related to the cue %s from captions?" ''' %(k, cue)
            middle_prompt = '''%s; %s. ''' %(option_a, option_b)
            last_prompt = '''Please follow the same four step comparison method (Step 1: Initial Perception; Step 2: Recognizing Incongruity; Step 3: Contextual Analysis; Step 4: Linking to the Question) and analyze in each step which option is better. '''
            content = f'''{start_prompt}{middle_prompt}{last_prompt}{Example_prompt}'''
            print(content)
        #     thread = threading.Thread(target=ask_gpt4, args=(content, num+1, file_lock, line, unanswered_questions))
        #     threads.append(thread)
        #     thread.start()
        #     num = num+1
        #     pbar.update()

        # for thread in threads:
        #     thread.join()

        # if unanswered_questions:
        #     retry_threads = []
        #     for question, thread_id, line in unanswered_questions:
        #         retry_thread = threading.Thread(target=ask_gpt4, args=(question, thread_id, file_lock, line, []))
        #         retry_threads.append(retry_thread)
        #         retry_thread.start()

        #     for thread in retry_threads:
        #         thread.join()